package com.facebook.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.text.Normalizer;
/**
* Perform unicode normalization on a string. See
* http://unicode.org/reports/tr15/ for details on what unicode normalization
* entails. The following normalization forms are supported (passed in via the
* second argument as a string):
* NFC
* NFD
* NFKC
* NFKD
*/
@Description(name = "udfnormalizeunicode",
value = "_FUNC_(string, form) - Normalization the unicode 'string' to 'form.'")
public class UDFNormalizeUnicode extends UDF {
public String evaluate(String s, String form) {
if (s == null || form == null) {
return null;
}
if (form.equals("NFC")) {
return Normalizer.normalize(s, Normalizer.Form.NFC);
} else if (form.equals("NFD")) {
return Normalizer.normalize(s, Normalizer.Form.NFD);
} else if (form.equals("NFKC")) {
return Normalizer.normalize(s, Normalizer.Form.NFKC);
} else if (form.equals("NFKD")) {
return Normalizer.normalize(s, Normalizer.Form.NFKD);
} else {
return null;
}
}
}